import os
import re
import logging
import time
import pyautogui
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import TimeoutException, NoSuchElementException, StaleElementReferenceException, NoSuchWindowException

# Configure logging
logging.basicConfig(filename='selenium_document_download.log', level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')

def setup_driver():
    options = Options()
    options.add_experimental_option("prefs", {
        "download.prompt_for_download": False,
        "download.directory_upgrade": True,
        "safebrowsing.enabled": True
    })
    driver = webdriver.Chrome(options=options)
    return driver

def sanitize_filename(filename):
    """Sanitize the filename by removing invalid characters and limiting length."""
    filename = re.sub(r'[\\/*?:"<>|]', "", filename)
    return filename[:100]  # Truncate to a safer length

def get_unique_filename(path, filename):
    """Generate a unique file name if the file already exists."""
    base, ext = os.path.splitext(filename)
    counter = 1
    unique_filename = filename
    while os.path.exists(os.path.join(path, unique_filename)):
        unique_filename = f"{base}_{counter}{ext}"
        counter += 1
    return unique_filename

def truncate_path(path, max_length=150):
    """Truncate the path to ensure it does not exceed the maximum length."""
    if len(path) > max_length:
        base, ext = os.path.splitext(path)
        path = base[:max_length - len(ext)] + ext
    return path

def login(driver, email, password):
    logging.info("Navigating to login page")
    driver.get('https://www.dpiarchive.com/#/login')
    WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.ID, 'email'))).send_keys(email)
    logging.info("Entered email")
    WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.ID, 'password'))).send_keys(password)
    logging.info("Entered password")
    WebDriverWait(driver, 30).until(EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Login')]"))).click()
    logging.info("Clicked login button")

def wait_for_file_to_load(driver, timeout=300):
    """Wait for the file to fully load in the browser."""
    try:
        WebDriverWait(driver, timeout).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, 'embed, iframe, object, img, video'))
        )
        time.sleep(10)  # Additional wait to ensure full load
    except TimeoutException:
        logging.error("Timeout waiting for file to load")

def retry_save_dialog(save_path):
    """Retry the save dialog if it fails the first time."""
    for _ in range(3):  # Retry up to 3 times
        try:
            pyautogui.hotkey('ctrl', 's')
            time.sleep(5)  # Wait for save dialog to appear
            pyautogui.write(save_path)
            pyautogui.press('enter')
            time.sleep(10)  # Wait for the save to complete

            # Verify if the file was saved
            if os.path.exists(save_path):
                return True

        except Exception as e:
            logging.error(f"Error during save dialog: {e}")
            time.sleep(5)
    return False

def download_documents(driver, url, base_download_path, category_relative_path):
    logging.info(f"Navigating to URL: {url}")
    driver.get(url)
    time.sleep(10)  # Ensure page is fully loaded

    retry_count = 3
    while retry_count > 0:
        try:
            document_elements = WebDriverWait(driver, 30).until(
                EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'tr.cursor-pointer td.p-3.flex-1.text-ellipsis.overflow-auto'))
            )
            logging.info(f"Found {len(document_elements)} document elements")

            for index in range(len(document_elements)):
                document_elements = WebDriverWait(driver, 30).until(
                    EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'tr.cursor-pointer td.p-3.flex-1.text-ellipsis.overflow-auto'))
                )
                document_element = document_elements[index]
                document_name = document_element.text.strip()

                document_name = sanitize_filename(document_name)
                unique_document_name = get_unique_filename(os.path.join(base_download_path, category_relative_path), document_name)
                logging.info(f"Processing document: {document_name}")
                document_element.click()
                time.sleep(5)  # Increased sleep to ensure the document page loads

                WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'i.fa-solid.fa-expand')))
                download_icon = driver.find_element(By.CSS_SELECTOR, 'i.fa-solid.fa-expand')
                download_icon.click()
                logging.info("Clicked download icon")

                driver.switch_to.window(driver.window_handles[-1])
                wait_for_file_to_load(driver)
                logging.info("Document loaded, triggering save dialog")

                # Build full path for saving the file
                category_path = os.path.join(base_download_path, category_relative_path)
                if not os.path.exists(category_path):
                    os.makedirs(category_path)
                full_path = os.path.join(category_path, unique_document_name)
                full_path = truncate_path(full_path)  # Ensure path length is within limits

                # Trigger save dialog with full path
                if retry_save_dialog(full_path):
                    logging.info(f"Downloaded document: {unique_document_name}")
                else:
                    logging.error(f"Failed to download document: {unique_document_name}")
                    driver.close()
                    driver.switch_to.window(driver.window_handles[0])
                    return  # Exit on failure to avoid endless loop

                driver.close()
                driver.switch_to.window(driver.window_handles[0])
                logging.info(f"Reloading URL: {url} to refresh document list")
                driver.get(url)  # Reload the page to refresh document elements
                time.sleep(10)  # Ensure page is fully loaded
            break  # Break if no exception
        except NoSuchWindowException as e:
            logging.error(f"NoSuchWindowException caught: {e}")
            driver.switch_to.window(driver.window_handles[0])
            logging.info("Switched back to the main window after NoSuchWindowException")
            break
        except StaleElementReferenceException as e:
            logging.error(f"StaleElementReferenceException caught: {e}")
            retry_count -= 1
            if retry_count == 0:
                logging.error("Maximum retries reached. Could not process the documents.")
        except (TimeoutException, NoSuchElementException) as e:
            logging.error(f"Error downloading documents: {e}")
            break
        except Exception as e:
            logging.error(f"Unexpected error: {e}")
            break

def process_urls(file_path, driver, base_download_path):
    current_category = []
    current_depth = 0

    with open(file_path, 'r') as file:
        lines = file.readlines()
        for line in lines:
            line = line.rstrip()
            depth = len(line) - len(line.lstrip(' '))

            if line.lstrip().startswith("Category: "):
                category_name = line.lstrip().split("Category: ")[1]
                category_name = sanitize_filename(category_name)
                if depth == current_depth:
                    if current_category:
                        current_category[-1] = category_name
                    else:
                        current_category.append(category_name)
                elif depth > current_depth:
                    current_category.append(category_name)
                else:
                    current_category = current_category[:depth // 2]
                    current_category.append(category_name)
                current_depth = depth

            elif line.lstrip().startswith("URL: "):
                url = line.lstrip().split("URL: ")[1]
                if url != "Not Available" and current_category:
                    category_relative_path = os.path.join(*current_category)
                    download_documents(driver, url, base_download_path, category_relative_path)

# Main execution
base_download_path = "E:\\Greer"  # Specific download directory
driver = setup_driver()

try:
    login(driver, 'isaackoi@gmail.com', 'INSERTPASSWORD')
    time.sleep(10)  # Wait for the main post-login page to fully load
    process_urls('E:\\Greer\\temp.txt', driver, base_download_path)
finally:
    driver.quit()
    logging.info("Script execution finished. Check the log for details.")
    print("Script execution finished. Check the log for details.")